#Title: NSX Balance Check 1.0.do
#Authors: Jack Reilly, Jack Belk
#Date: 10.4.22
#Purpose: Balance Check for Experiment in Reilly & Belk, "Political And Social Discussion Network Survey Items are Not Interchangeable" (JEPS)
#Requires: 2020 CES - NCF Module Subset
#Output: figures (to working directory)

#Load libraries
library(tidyverse)
library(haven)
library(statip)
library(stargazer)

# Set working directory
wd <- file.path("~", "Desktop", "Working", "NSX", "reproduce")

# Read in the raw data as provided by YouGov
raw <- haven::read_sav(
  file.path(wd, "CCES20_NCF_OUTPUT_vv.sav")
)

# Recode the variables
df <- raw %>%
  mutate(
    
    age = 2020 - birthyr,
    
    female = if_else(gender == 2, 1, 0),
    
    education = case_when(
      educ == 1 ~ "No HS",
      educ == 2 ~ "High school graduate",
      educ == 3 ~ "Some college",
      educ == 4 ~ "2-year",
      educ == 5 ~ "4-year",
      educ == 6 ~ "Post-grad"
    ),
    
    race = case_when(
      race == 1 ~ "White",
      race == 2 ~ "Black",
      race == 3 ~ "Hispanic",
      race == 4 ~ "Asian",
      race == 5 ~ "Native American",
      race == 6 ~ "Two or more races",
      race == 7 ~ "Other race",
      race == 8 ~ "Middle Eastern"
    ),
    
    hispanic = case_when(
      hispanic == 9 ~ TRUE,
      hispanic == 1 ~ TRUE,
      hispanic == 8 ~ NA,
      T ~ FALSE
    ),
    
    marital_status = case_when(
      marstat == 1 ~ "Married",
      marstat == 2 ~ "Separated",
      marstat == 3 ~ "Divorced",
      marstat == 4 ~ "Widowed",
      marstat == 5 ~ "Never married",
      marstat == 6 ~ "Domestic / civil partnership",
    ),
    
    pid3 = case_when(
      pid7 %in% c(1, 2, 3) ~ "Democrat",
      pid7 == 4 ~ "Independent",
      pid7 %in% c(5, 6, 7) ~ "Republican",
      pid7 %in% c(8, 9) ~ "Not sure / Don't know"
    ),
    
    pid7 = case_when(
      pid7 == 1 ~ "1 Strong Democrat",
      pid7 == 2 ~ "2 Not very strong Democrat",
      pid7 == 3 ~ "3 Lean Democrat",
      pid7 == 4 ~ "4 Independent",
      pid7 == 5 ~ "5 Lean Republican",
      pid7 == 6 ~ "6 Not very strong Republican",
      pid7 == 7 ~ "7 Strong Republican",
      pid7 == 8 ~ "8 Not sure",
      pid7 == 9 ~ "9 Don't know"
    ),
    
    political_interest = case_when(
      newsint == 1 ~ "1 Most of the time",
      newsint == 2 ~ "2 Some of the time",
      newsint == 3 ~ "3 Only now and then",
      newsint == 4 ~ "4 Hardly at all",
      newsint == 5 ~ "5 Don't know",
    ),
    
    urbancity = case_when(
      urbancity == 5 & urbancity_t == "Urban" ~ "City",
      urbancity == 5 & urbancity_t == "Village" ~ "Rural area",
      urbancity == 1 ~ "City",
      urbancity == 2 ~ "Suburb",
      urbancity == 3 ~ "Town",
      urbancity == 4 ~ "Rural area"
    ),
    
    treatment = case_when(
      NCF710A_NCF710B_split == 1 ~ "Politics", 
      NCF710A_NCF710B_split == 2 ~ "Important Matters",
      T ~ NA_character_
    ),
    
    politics_treatment = case_when(
      is.na(treatment) ~ NA,
      treatment == "Politics" ~ T,
      T ~ F
    ),
    
    net_size = case_when(
      treatment == "Politics" ~ NCF710A,
      treatment == "Important Matters" ~ NCF710B
    ),
    
    net_size = net_size - 1,
    
    net_size_more_than_10 = case_when(
      is.na(net_size) ~ NA,
      net_size == 11 ~ T,
      T ~ F
    ),
    
    net_size_is_0 = case_when(
      is.na(net_size) ~ NA,
      net_size == 0 ~ T,
      T ~ F
    ) 
    
  ) %>%
  
  select(
    caseid,
    teamweight,
    treatment,
    politics_treatment,
    net_size,
    net_size_more_than_10,
    net_size_is_0,
    age,
    female,
    race,
    hispanic,
    education,
    marital_status,
    urbancity,
    pid3,
    pid7,
    political_interest,
    countyfips
  ) %>%
  haven::zap_labels() %>%
  haven::zap_label() %>%
  haven::zap_formats()

covars <- 
  c(
    "age",
    "female",
    "race",
    "education",
    "marital_status",
    "urbancity",
    "pid3",
    "political_interest"
  )

# There are 427 individuals assigned to the politics condition, and 415 assinged to the important
# matters condition. There are 158 individuals who were not assigned to a treatment condition. We
# drop them from the data.
df <- filter(df, !is.na(treatment))


# We relied on YouGov to implement the randomization. Are we able to predict treatment assignment
# using core demographic covariates?
balance_check <- 
  glm(
    formula = as.formula(paste("as.factor(treatment) ~ ", paste(covars, collapse = "+"))),
    family = "binomial",
    data = df
  )

# Our balance check suggests only minor imbalances in the randomization.
summary(balance_check)

# Can use this to make a latex table. Prints an ASCII table to console.
stargazer(
  balance_check, 
  title = "NCF Randomization Balance Check",
  dep.var.labels = "Treatment Assignment",
  type = "text", #type = "latex",
  #out = file.path(wd, "paper", "cces_balance_table_raw.tex"),
  keep.stat = c("n"),
  single.row = T,
  no.space = T,
  font.size = "footnotesize",
  align = T
)

# We also conduct a chi-squared test to assess whether there is a relationship between
# county of residence and treatment assignment. We do not find a meaningful relationship.
chisq.test(df$treatment, df$countyfips, simulate.p.value = T)


# Prepare data for plotting
plot_data <- 
  as_tibble(summary(balance_check)$coefficients, rownames = "term") %>%
  
  filter(term != "(Intercept)") %>%
  
  mutate(
  lower = Estimate - (1.96* `Std. Error`),
  upper = Estimate + (1.96* `Std. Error`),
  term_pretty = case_when(
    term == "urbancityTown" ~ "Place type: town",
    term == "urbancitySuburb" ~ "Place type: suburb",
    term == "urbancityRural area" ~ "Place type: rural area",
    term == "raceWhite" ~ "White",
    term == "raceTwo or more races" ~ "Two or more races",
    term == "raceOther race" ~ "Other race",
    term == "raceNative American" ~ "Native American",
    term == "raceHispanic" ~ "Hispanic",
    term == "raceBlack" ~ "Black",
    term == "political_interest4 Hardly at all" ~ "Watch news: hardly at all",
    term == "political_interest3 Only now and then" ~ "Watch news: only now and then",
    term == "political_interest2 Some of the time" ~ "Watch news: some of the time",
    term == "pid3Republican" ~ "PID: republican",
    term == "pid3Not sure / Don't know" ~ "PID: don't know",
    term == "pid3Independent" ~ "PID: independent",
    term == "marital_statusWidowed" ~ "Marital stat.: widowed",
    term == "marital_statusSeparated" ~ "Marital stat.: separated",
    term == "marital_statusNever married" ~ "Marital stat.: never married",
    term == "marital_statusMarried" ~ "Marital stat.: married",
    term == "marital_statusDomestic / civil partnership" ~ "Marital stat.: domestic partners",
    term == "female" ~ "Female",
    term == "educationSome college" ~ "Edu: some college",
    term == "educationPost-grad" ~ "Edu: post-grad",
    term == "educationNo HS" ~ "Edu: no high school",
    term == "educationHigh school graduate" ~ "Edu: high school",
    term == "education4-year" ~ "Edu: 4-year college",
    term == "age" ~ "Age"
  ),
  term_pretty = factor(term_pretty, levels = rev(term_pretty))
) 
  
# Create the plot
balance_check_plot <- 
  ggplot(
    data = plot_data,
    aes(x = Estimate, y = term_pretty)
    ) +
  geom_point() +
  geom_errorbar(aes(xmin = lower, xmax = upper)) +
  geom_vline(xintercept = 0) +
  ylab("") +
  theme_light()

# Save
ggsave(file.path(wd, "balance_check_plot.png"), plot = balance_check_plot)
